# Clear working environment
rm(list = ls())


# Script functions: 
# 1. Recodes main variables for analysis. 
# 2. Produces figures in text. 
# 3. Produces regression analyses for appendix. 
# 4. Calculates sample values for Appendix Table 1. 
# 5. Exports regression results for appendix as txt files. 
# 6. Exports Figures 1, 2, 3, 4, and 5 in text as tiff files. 

# Load packages 
library(ggplot2)
library(dplyr)
library(ggpubr)
library(stringr)
library(stargazer)


# set working directory 
#setwd("")

# Import Data
data <- read.csv("Ebb_and_Flow_Survey_Data.csv")

##### Main recoding of variables for analysis. 
data <-  data %>% mutate(female = ifelse(gender == "Female",1,0), 
                         college = ifelse(educ == "College",1,0), 
                         hs = ifelse(educ == "High School",1,0), 
                         married = ifelse(marstat == "Married",1,0), 
                         single = ifelse(marstat == "Umarried/Single",1,0), 
                         income_u500 = ifelse(faminc == "Between 251 and 500 dollars per month." | faminc == "Under 250 dollars per month.",1,0),
                         income_o1000 = ifelse(faminc == "More than 1000 dollars per month.",1,0), 
                         lowland = ifelse(location...27 == "On a low-lying atoll or near the sea.",1,0), 
                         usfollows2 = recode(usfollows,  "None" = 0, .missing = 0, "A few" = 1, "Many" = 2, "Most" = 3), 
                         planmove = ifelse(move == "Yes",1,0), 
                         persrisk = persrisk_1 + persrisk_2 + persrisk_3, 
                         climate_perception_1 = recode(climate_perception_1, "Strongly Disagree" = 4, "Disagree" = 3, "Agree" = 2, "Strongly Agree" = 1), 
                         climate_perception_2 = recode(climate_perception_2, "Strongly Agree" = 4, "Agree" = 3, "Disagree" = 2, "Strongly Disagree" = 1), 
                         climate_perception_3 = recode(climate_perception_3, "Strongly Disagree" = 4, "Disagree" = 3, "Agree" = 2, "Strongly Agree" = 1), 
                         seriousness = climate_perception_1 + climate_perception_2 + climate_perception_3,
                         ownlowland = ifelse(ownlandwhere == "On a low-lying atoll or near the sea.",1,0), 
                         goodhealth = ifelse(health == "Good. I need almost no medical attention.",1,0), 
                         hasjob = ifelse(jobhas == "Yes, I have a job.",1,0), 
                         childep = ifelse(children == "None",0,1), 
                         adultdep = ifelse(adepend == "None",0,1), 
                         dependents = ifelse(childep + adultdep >0,1,0),
                         familyuscom2 = ifelse(familyuscom == "Daily" | familyuscom == "Weekly","Regularly","Rarely"),
                         familyuscom2 = ifelse(familyus == "No","No Family",familyuscom2), 
                         famuscom = ifelse(familyuscom == "Daily" | familyuscom == "Weekly",1,0), 
                         famusyesno = ifelse(familyus == "Yes",1,0))


## Seriousness perception and connection low land location: 
data$newland <- ifelse(data$lowland == 1 | data$ownlowland == 1, 1,0)
data$newland <- ifelse(data$ownland == "No.",0, data$newland)

# Create categorical variable for terciles of personal risk perception. 
data$g.groups <- cut(data$persrisk, quantile(data$persrisk, c(0,1/3,2/3,1),na.rm = TRUE))


#############################################################################################################################
#############################################################################################################################
###### FIGURES FOR TEXT #### ################################################################################################
#############################################################################################################################
#############################################################################################################################


#############################################################################################################################
##### FIGURE 1: Risk Perception and Low-Land
# Calculate averages, standard deviations, and sample sizes for each tercile group. 
avs <- as.numeric(tapply(data$newland, data$g.groups, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$newland, data$g.groups, sd, na.rm = TRUE))
ns <- as.numeric(table(data$g.groups))
# Compute upper and lower bounds of 95% confidence intervals. 
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
# Put point estimates, upper, and lower bounds into dataframe. 
bardata <- data.frame(avs,upper,lower, x = c(1,2,3))
## Tests of Difference between the three groups, and two-sided p-values. 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z23 <- (avs[2] - avs[3])/sqrt(sds[3]^2/ns[3] + sds[2]^2/ns[2])
z13 <- (avs[1] - avs[3])/sqrt(sds[3]^2/ns[3] + sds[1]^2/ns[1])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p23 <- 2*pnorm(abs(z23), lower.tail = F)
p13 <- 2*pnorm(abs(z13), lower.tail = F)
# Differences between groups. 
d12 <- avs[1] - avs[2]
d23 <- avs[2] - avs[3]
d13 <- avs[1] - avs[3]

# Produce Figure 1
Figure_1 <- ggplot(bardata) + geom_bar(aes(x = as.factor(x), y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = as.factor(x), ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("Terciles of Personal Risk Perceptions") + ylab("Proportion on Low Land") + 
  ylim(c(0,1))  + 
  geom_bracket(xmin = "1", xmax = "2", y.position = 0.85, label = paste(round(-d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "2", xmax = "3", y.position = 0.9, label = paste(round(-d23,3), "(p =",round(p23,3),")"), label.size = 3 ) +
  geom_bracket(xmin = "1", xmax = "3", y.position = 0.95, label = paste(round(-d13,3), "(p =",round(p13,3),")"), label.size = 3 )


#############################################################################################################################
##### FIGURE 2: Risk perception, planning to move, and God's protection
### Left Panel: Planning to move and terciles of risk perception 
# Calculate averages, standard deviations, group sizes, and upper and lower bounds for 95% confidence intervals
avs <- as.numeric(tapply(data$planmove, data$g.groups, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$planmove, data$g.groups, sd, na.rm = TRUE))
ns <- as.numeric(table(data$g.groups))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
# Collect data into dataframe
bardata <- data.frame(avs,upper,lower, x = c(1,2,3))
## Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z23 <- (avs[2] - avs[3])/sqrt(sds[3]^2/ns[3] + sds[2]^2/ns[2])
z13 <- (avs[1] - avs[3])/sqrt(sds[3]^2/ns[3] + sds[1]^2/ns[1])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p23 <- 2*pnorm(abs(z23), lower.tail = F)
p13 <- 2*pnorm(abs(z13), lower.tail = F)
# Differences between groups. 
d12 <- avs[1] - avs[2]
d23 <- avs[2] - avs[3]
d13 <- avs[1] - avs[3]

p.moving.risk <- ggplot(bardata) + geom_bar(aes(x = as.factor(x), y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = as.factor(x), ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("Terciles of Personal Risk Perceptions") + ylab("Proportion Planning Move") + 
  ylim(c(0,0.6))  + 
  geom_bracket(xmin = "1", xmax = "2", y.position = 0.30, label = paste(round(-d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "2", xmax = "3", y.position = 0.45, label = paste(round(-d23,3), "(p =",round(p23,3),")"), label.size = 3 ) +
  geom_bracket(xmin = "1", xmax = "3", y.position = 0.50, label = paste(round(-d13,3), "(p =",round(p13,3),")"), label.size = 3 )


### Right Panel: God's protection and risk perceptions. 
# Rescaled personal risk perceptions (rescale by the 3 11 point scales so its between 0 and 1)
# Calculate averages ,standard deviations, sample sizes, and bounds of 95% confidence intervals. 
avs <- as.numeric(tapply(data$persrisk/33, data$climate_perception_3, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$persrisk/33, data$climate_perception_3, sd, na.rm = TRUE))
ns <- as.numeric(table(data$climate_perception_3))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
# collect into dataframe. 
bardata <- data.frame(avs,upper,lower, x = c("Str Disagree","Disagree","Agree","Str Agree"))
# Rename categories for plot
bardata$x <- factor(bardata$x, levels = c("Str Disagree","Disagree","Agree","Str Agree"))
## Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z23 <- (avs[2] - avs[3])/sqrt(sds[3]^2/ns[3] + sds[2]^2/ns[2])
z34 <- (avs[3] - avs[4])/sqrt(sds[3]^2/ns[3] + sds[4]^2/ns[4])
z14 <- (avs[1] - avs[4])/sqrt(sds[1]^2/ns[1] + sds[4]^2/ns[4])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p23 <- 2*pnorm(abs(z23), lower.tail = F)
p34 <- 2*pnorm(abs(z34), lower.tail = F)
p14 <- 2*pnorm(abs(z14), lower.tail = F)
# Differences between groups (by belief in God's protection)
d12 <- avs[1] - avs[2]
d23 <- avs[2] - avs[3]
d34 <- avs[3] - avs[4]
d14 <- avs[1] - avs[4]

# Produce plot 
p.risk.god <- ggplot(bardata) + geom_bar(aes(x = as.factor(x), y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = as.factor(x), ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("God will protect me") + ylab("Personal Risk Perceptions") + 
  ylim(c(0,1))  +
  #  geom_bracket(xmin = "Str Disagree", xmax = "Disagree", y.position = 0.87, label = paste(round(-d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  #  geom_bracket(xmin = "Disagree", xmax = "Agree", y.position = 0.83, label = paste(round(-d23,3), "(p =",round(p23,3),")"), label.size = 3 ) +
  #  geom_bracket(xmin = "Agree", xmax = "Str Agree", y.position = 0.86, label = paste(round(-d34,3), "(p < 0.001)"), label.size = 3 ) +
  geom_bracket(xmin = "Str Disagree", xmax = "Str Agree", y.position = 0.96, label = paste(round(-d14,3), "(p =",round(p14,3),")"), label.size = 3 ) 


### Middle Panel: Planning to move and belief in God's protection
# Calculate averages, standard deviations, sample sizes, and confidence interval bounds. 
avs <- as.numeric(tapply(data$planmove, data$climate_perception_3, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$planmove, data$climate_perception_3, sd, na.rm = TRUE))
ns <- as.numeric(table(data$climate_perception_3))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
# Collect in dataframe and rename categories for plot. 
bardata <- data.frame(avs,upper,lower, x = c("Str Disagree","Disagree","Agree","Str Agree"))
bardata$x <- factor(bardata$x, levels = c("Str Disagree","Disagree","Agree","Str Agree"))

## Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z14 <- (avs[1] - avs[4])/sqrt(sds[4]^2/ns[4] + sds[1]^2/ns[1])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p14 <- 2*pnorm(abs(z14), lower.tail = F)
# Differences between categories 
d12 <- avs[1] - avs[2]
d14 <- avs[1] - avs[4]

p.moving.god <- ggplot(bardata) + geom_bar(aes(x = x, y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = x, ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("God will protect me") + ylab("Proportion planning move") + 
  ylim(c(0,0.8))  + 
  geom_bracket(xmin = "Str Disagree", xmax = "Str Agree", y.position = 0.75, label = paste(round(-d14,3), "(p <0.001)"), label.size = 3 ) + 
  geom_bracket(xmin = "Str Disagree", xmax = "Disagree", y.position = 0.65, label = paste(round(-d12,3), "(p <0.001)"), label.size = 3 ) 

### Collect panels to Produce Figure 2
Figure_2 <- ggarrange(p.moving.risk, p.moving.god, p.risk.god, nrow = 1)


#############################################################################################################################
##### FIGURE 3: Connections with US Family and Migration Plans. 
### Left Panel: Communications and Plans to move. 
# Calculate averages, standard deviations, sample sizes, and confidence interval bounds. 
avs <- as.numeric(tapply(data$planmove, data$familyuscom2, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$planmove, data$familyuscom2, sd, na.rm = TRUE))
ns <- as.numeric(table(data$familyuscom2))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
lower[1] <- 0 # To avoid it being below zero
# collect in dataframe 
bardata <- data.frame(avs,upper,lower, x = names(table(data$familyuscom2)))

## Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z23 <- (avs[2] - avs[3])/sqrt(sds[2]^2/ns[2] + sds[3]^2/ns[3])
z13 <- (avs[1] - avs[3])/sqrt(sds[1]^2/ns[1] + sds[3]^2/ns[3])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p23 <- 2*pnorm(abs(z23), lower.tail = F)
p13 <- 2*pnorm(abs(z13), lower.tail = F)
d12 <- avs[1] - avs[2]
d23 <- avs[2] - avs[3]
d13 <- avs[1] - avs[3]

# Produce left panel 
p.fam.com <- ggplot(bardata, aes(x = x)) + geom_bar(aes(x = x, y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = x, ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("US Family & Communications") + ylab("Proportion planning move") + 
  ylim(c(0,0.3)) + 
  geom_bracket(xmin = "No Family", xmax = "Rarely", y.position =0.20 , label = paste(round(-d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "Rarely", xmax = "Regularly", y.position = 0.25, label = paste(round(-d23,3), "(p =",round(p23,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "No Family", xmax = "Regularly", y.position = 0.30, label = paste(round(-d13,3), "(p =",round(p13,3),")"), label.size = 3 )

### Right Panel: Encouragement to migrate and plans to migrate. 
# REcode variable of encouragement. 
data$familyusjoin2 <- data$familyusjoin
data$familyusjoin2 <- ifelse(data$familyus == "No", "No Family", data$familyusjoin2)
data$familyusjoin2 <- ifelse(data$familyusjoin2 == "No","Not Encouraged", data$familyusjoin2)
data$familyusjoin2 <- ifelse(data$familyusjoin2 == "Yes","Encouraged", data$familyusjoin2)
# Rename factor levels. 
data$familyusjoin2 <- factor(data$familyusjoin2, levels = c("No Family","Not Encouraged","Encouraged"))
# Calculate averages, standard deviations, sample sizes, and confidence interval bouns for each category. 
avs <- as.numeric(tapply(data$planmove, data$familyusjoin2, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$planmove, data$familyusjoin2, sd, na.rm = TRUE))
ns <- as.numeric(table(data$familyusjoin2))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
lower[1] <- 0 # To avoid it being below zero

# collect into dataframe. 
bardata <- data.frame(avs,upper,lower, x = names(table(data$familyusjoin2)))
bardata$x <- factor(bardata$x, levels = c("No Family","Not Encouraged","Encouraged"))

## Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z23 <- (avs[2] - avs[3])/sqrt(sds[2]^2/ns[2] + sds[3]^2/ns[3])
z13 <- (avs[1] - avs[3])/sqrt(sds[1]^2/ns[1] + sds[3]^2/ns[3])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p23 <- 2*pnorm(abs(z23), lower.tail = F)
p13 <- 2*pnorm(abs(z13), lower.tail = F)
d12 <- avs[1] - avs[2]
d23 <- avs[2] - avs[3]
d13 <- avs[1] - avs[3]

# Produce right panel 
p.fam.join <- ggplot(bardata, aes(x = x)) + geom_bar(aes(x = x, y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = x, ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("US Family & Encouragement to Migrate") + ylab("Proportion planning move") + 
  ylim(c(0,0.45)) + 
  geom_bracket(xmin = "No Family", xmax = "Not Encouraged", y.position =0.20 , label = paste(round(-d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "Not Encouraged", xmax = "Encouraged", y.position = 0.40, label = paste(round(-d23,3), "(p <0.001)"), label.size = 3 ) + 
  geom_bracket(xmin = "No Family", xmax = "Encouraged", y.position = 0.45, label = paste(round(-d13,3), "(p <0.001)"), label.size = 3 )

### Produce Figure 3: 
Figure_3 <- ggarrange(p.fam.com, p.fam.join)


#############################################################################################################################
##### FIGURE 4: Jobs, Income, and Plans to Migrate. 
### Left Panel: Jobs status/search and plans to migrate. 
# Define combined variable on job status (yes/no) and searching for job
data$job.simp <- ifelse(data$jobhas == "Yes, I have a job.", "Has Job", "")
data$job.simp <- ifelse(data$jobhas == "I do not currently have a job.", "No Job",data$job.simp) 
data$job.simp <- ifelse(data$jobhas == "I live off the land.", "No Job", data$job.simp)
data$job.simp.new <- paste(data$job.simp,"/ Not looking", sep = " ") 
data$job.simp.new <- ifelse(data$jobfind == "Yes" & data$job.simp == "No Job", "No Job / Looking", data$job.simp.new)
data$job.simp.new <- ifelse(data$jobfindnew == "Yes"& data$job.simp == "Has Job","Has Job / Looking", data$job.simp.new)

# calculate average, standard deviation, sample sizes, and confidence bounds for categories. 
avs <- as.numeric(tapply(data$planmove, data$job.simp.new, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$planmove, data$job.simp.new, sd, na.rm = TRUE))
ns <- as.numeric(table(data$job.simp.new))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
# collect into dataframe and rename categories for plot. 
bardata <- data.frame(avs,upper,lower, x = names(table(data$job.simp.new)))
bardata$x <- factor(bardata$x, levels = c("Has Job / Not looking","Has Job / Looking","No Job / Not looking","No Job / Looking"))
## Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z34 <- (avs[3] - avs[4])/sqrt(sds[3]^2/ns[3] + sds[4]^2/ns[4])
z13 <- (avs[1] - avs[3])/sqrt(sds[1]^2/ns[1] + sds[3]^2/ns[3])
z24 <- (avs[2] - avs[4])/sqrt(sds[2]^2/ns[2] + sds[4]^2/ns[4])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p34 <- 2*pnorm(abs(z34), lower.tail = F)
p13 <- 2*pnorm(abs(z13), lower.tail = F)
p24 <- 2*pnorm(abs(z24), lower.tail = F)
d12 <- avs[1] - avs[2]
d34 <- avs[3] - avs[4]
d13 <- avs[1] - avs[3]
d24 <- avs[2] - avs[4]

# Produce left panel 
p.jobs <- ggplot(bardata) + geom_bar(aes(x = x, y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = x, ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("Job Status & Search") + ylab("Proportion planning move") + 
  ylim(c(0,0.45)) +  scale_x_discrete(labels = function(x) str_wrap(x,width = 12)) +
  geom_bracket(xmin = "Has Job / Looking", xmax = "Has Job / Not looking", y.position =0.40 , label = paste(round(d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "No Job / Looking", xmax = "No Job / Not looking", y.position = 0.40, label = paste(round(d34,3), "(p =",round(p34,3),")"), label.size = 3 ) 


### Right Panel: Income level (non-remittances) and plans to migrate. 
# recode income levels 
data$faminc <- as.factor(data$faminc)
levels(data$faminc) <- c("251-500","501-750","751-1000",">1000","<250")
data$faminc <- factor(data$faminc, levels = c("<250","251-500","501-750","751-1000",">1000"))
# Produce average, standard deviations, sample sizes, and confidence bounds by category of income. 
avs <- as.numeric(tapply(data$planmove, data$faminc, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$planmove, data$faminc, sd, na.rm = TRUE))
ns <- as.numeric(table(data$faminc))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
lower[4] <- 0 # So that it does not go below the minimum of the plot 
# Collect in dataframe and rename categories for plot. 
bardata <- data.frame(avs,upper,lower, x = names(table(data$faminc)))
bardata$x <- factor(bardata$x, levels = c("<250","251-500","501-750","751-1000",">1000"))

# Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z23 <- (avs[2] - avs[3])/sqrt(sds[2]^2/ns[2] + sds[3]^2/ns[3])
z34 <- (avs[3] - avs[4])/sqrt(sds[3]^2/ns[3] + sds[4]^2/ns[4])
z45 <- (avs[4] - avs[5])/sqrt(sds[4]^2/ns[4] + sds[5]^2/ns[5])
z15 <- (avs[1] - avs[5])/sqrt(sds[1]^2/ns[1] + sds[5]^2/ns[5])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p23 <- 2*pnorm(abs(z23), lower.tail = F)
p34 <- 2*pnorm(abs(z34), lower.tail = F)
p45 <- 2*pnorm(abs(z45), lower.tail = F)
p15 <- 2*pnorm(abs(z15), lower.tail = F)
d12 <- avs[1] - avs[2]
d23 <- avs[2] - avs[3]
d34 <- avs[3] - avs[4]
d45 <- avs[4] - avs[5]
d15 <- avs[1] - avs[5]

# Produce left panel 
p.faminc <- ggplot(bardata, aes(x = x)) + geom_bar(aes(x = x, y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = x, ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("Family Income (Non-Remittances)") + ylab("Proportion planning move") + 
  ylim(c(0,0.45))  + 
  geom_bracket(xmin = "<250", xmax = "251-500", y.position =0.40 , label = paste(round(-d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "251-500", xmax = "501-750", y.position = 0.35, label = paste(round(-d23,3), "(p =",round(p23,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "501-750", xmax = "751-1000", y.position = 0.30, label = paste(round(-d34,3), "(p =",round(p34,3),")"), label.size = 3 ) +
  geom_bracket(xmin = "751-1000", xmax = ">1000", y.position = 0.20, label = paste(round(-d45,3), "(p =",round(p45,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "<250", xmax = ">1000", y.position = 0.45, label = paste(round(-d15,3), "(p =<0.001)"), label.size = 3 ) 


### Produce Figure 4: Jobs, Income, and Plans to Migrate. 
Figure_4 <- ggarrange(p.jobs, p.faminc)


#############################################################################################################################
##### FIGURE 5: Personal Health and Plans to migrate. 
# calculate averages, standard deviations, sample sizes, and confidence bounds for categories of health 
avs <- as.numeric(tapply(data$planmove, data$health, mean, na.rm = TRUE))
sds <- as.numeric(tapply(data$planmove, data$health, sd, na.rm = TRUE))
ns <- as.numeric(table(data$health))
upper <- avs + 1.96*sds/sqrt(ns)
lower <- avs - 1.96*sds/sqrt(ns)
lower[3] <- 0 # To avoid it being below zero
# collect into datafram and rename categories for plot
bardata <- data.frame(avs,upper,lower, x = names(table(data$health)))
bardata$x <- ifelse(bardata$x == "Good. I need almost no medical attention.","Good", 
                    ifelse(bardata$x == "Fair. I need medical attention sometimes.","Fair","Not Good"))
bardata$x <- factor(bardata$x, levels = c("Not Good","Fair","Good"))

# Tests of Difference: 
z12 <- (avs[1] - avs[2])/sqrt(sds[1]^2/ns[1] + sds[2]^2/ns[2])
z23 <- (avs[2] - avs[3])/sqrt(sds[2]^2/ns[2] + sds[3]^2/ns[3])
z13 <- (avs[1] - avs[3])/sqrt(sds[1]^2/ns[1] + sds[3]^2/ns[3])
p12 <- 2*pnorm(abs(z12), lower.tail = F)
p23 <- 2*pnorm(abs(z23), lower.tail = F)
p13 <- 2*pnorm(abs(z13), lower.tail = F)
d12 <- avs[1] - avs[2]
d23 <- avs[2] - avs[3]
d13 <- avs[1] - avs[3]

### Produce Figure 5 
Figure_5 <- ggplot(bardata, aes(x = x)) + geom_bar(aes(x = x, y = avs), stat = "identity", fill = "gray") + 
  geom_errorbar(aes(x = x, ymin = lower, ymax = upper), width = 0.2) + 
  theme_bw() + xlab("Personal Health") + ylab("Proportion planning move") + 
  ylim(c(0,0.45)) + 
  geom_bracket(xmin = "Not Good", xmax = "Fair", y.position =0.40 , label = paste(round(d13,3), "(p =",round(p13,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "Fair", xmax = "Good", y.position = 0.35, label = paste(round(-d12,3), "(p =",round(p12,3),")"), label.size = 3 ) + 
  geom_bracket(xmin = "Not Good", xmax = "Good", y.position = 0.45, label = paste(round(d23,3), "(p =",round(p23,3),")"), label.size = 3 )


#############################################################################################################################
#############################################################################################################################
###### MULTIVARIATE ANALYSIS ################################################################################################
#############################################################################################################################
#############################################################################################################################

###### MULTIVARIATE ANALYSIS 
# Fix god perception (only extreme versus the others)
# Separate having a job from looking for job (regardless of whether they have one). 
# Make income a continuous variable. 

data$job.find.all <- ifelse(is.na(data$jobfind), data$jobfindnew, data$jobfind)
data$job.find.all <- ifelse(data$job.find.all == "Yes", 1,0)
data$nogod <- 1-ifelse(data$climate_perception_3 <2,1,0)
data$encouraged <- ifelse(data$familyusjoin2 == "Encouraged",1,0)
data$notencouraged <- ifelse(data$familyusjoin2 == "Not Encouraged",1,0)
data$college <- ifelse(data$educ == "College", 1,0)
data$HS <- ifelse(data$educ == "High School", 1, 0)
data$nousvisit <- ifelse(data$usvisit == "Never.",1,0)
data$faminc2 <- ifelse(as.numeric(data$faminc) < 2,"Under 250", 
                       ifelse(as.numeric(data$faminc) <=4, "z250-1000", "Over 1000" ))
# Rescale persrisk (3 elevent point scales, add up to 33). 
data$persrisk <- data$persrisk/33

# all regressions use plan to migrate as the outcome variable. 

## Using all factors in the figures except for perception of risk. 
reg1 <- lm(planmove ~ newland  + nogod +  as.factor(familyuscom2) + notencouraged + 
             as.factor(job.simp) +job.find.all + faminc + health , data = data)
# with additional demograpgic controls
reg1c <- lm(planmove ~ newland  + nogod +  as.factor(familyuscom2) + notencouraged + 
              as.factor(job.simp) +job.find.all + faminc + health +
              age + female + dependents + college + HS + nousvisit + single, data = data)


# Using all factors in the figures except for living in low land 
reg2 <- lm(planmove ~ g.groups + nogod + as.factor(familyuscom2) + notencouraged  +
             as.factor(job.simp) +job.find.all + faminc + health, data = data)
# with additional demographic controls 
reg2c <- lm(planmove ~ g.groups + nogod + as.factor(familyuscom2) + notencouraged  +
              as.factor(job.simp) +job.find.all + faminc + health +
              age + female + dependents + college + HS + nousvisit + single, data = data)


## Using all factors in the figures (including both lowland and risk perception )
reg3 <- lm(planmove ~ newland + g.groups + nogod + as.factor(familyuscom2) + notencouraged  +
             as.factor(job.simp) +job.find.all + faminc + health , data = data)
# with additional demographic controls. 
reg3c <- lm(planmove ~ newland + g.groups + nogod + as.factor(familyuscom2) + notencouraged  +
              as.factor(job.simp) +job.find.all + faminc + health +
              age + female + dependents + college + HS + nousvisit + single, data = data)

# List of variable labels for export Table. 
varlabels = c("Live Low Land", "Risk Perception, T2", "Risk Perception, T3", "God Will Protect", 
              "US Fam: Rarely Comms","US Fam: Regularly Comms", "US Fam Not Encouraged", 
              "Has No Job", "Looking for Job", "Income: 251-500", "Income: 501-750", 
              "Income: 751-1000", "Income: > 1000", "Health: Good", "Health: Not Good")

# Export Appendix Table 2 with models that do not include additional demographics as control. 
stargazer(reg1, reg2, reg3, 
          align = TRUE, column.sep.width = "1pt", 
          digits = 3, no.space = TRUE , df = FALSE, omit.stat = c("rsq","adj.rsq","f","ser"), 
          covariate.labels = varlabels, out="MultivariateAppendixTable.txt")

# List of variable labels for export Table 2. 
varlabels2 <- c(varlabels, "Age", "Female","Has Dependents","Educ: College","Educ: High School", 
                "Never Visit US", "Single")

# Export Appendix Table 3 with models that include additional demographics as controls. 
stargazer(reg1c, reg2c, reg3c, 
          align = TRUE, column.sep.width = "1pt", 
          digits = 3, no.space = TRUE , df = FALSE, omit.stat = c("rsq","adj.rsq","f","ser"), 
          covariate.labels = varlabels2, out="MultivariateAppendixTable2.txt")


### Sample Characteristics for Appendix Table 1: 
# Gender ratio 
sum(data$gender == "Male", na.rm = TRUE)/sum(data$gender == "Female", na.rm = TRUE)
# Children per woman 
(table(data$children[data$gender == "Female"]) %*% c(1,2,3,0))/sum(table(data$children[data$gender == "Female"])[1:3])
# Unemployment rate (not employed but looking, divided by not employed but looking plus employed)
sum(data$jobhas == "I do not currently have a job." & data$jobfind == "Yes",na.rm = TRUE)/(sum(data$jobhas == "I do not currently have a job." & data$jobfind == "Yes",na.rm = TRUE) + sum(data$jobhas == "Yes, I have a job.", na.rm = TRUE))
# Average and mean adult age
mean(data$age, na.rm = TRUE)
median(data$age, na.rm = TRUE)
# Education, complete secondary
mean(data$educ == "High School", na.rm = TRUE)
# Religion: Catholic and protestant. 
mean(data$religion == "Catholic", na.rm = TRUE)
mean(data$religion != "Catholic" & data$religion != "No religious affiliation" & data$religion != "Other", na.rm = TRUE)



### EXPORT FIGURES IN TIFF FORMAT (height 4.31, width 4 per panel)
ggsave("Figure1.tiff", plot = Figure_1, scale = 1, dpi = 300, units = "in", width = 4, height = 4.31)
ggsave("Figure2.tiff", plot = Figure_2, scale = 1, dpi = 300, units = "in", width = 12, height = 4.31 )
ggsave("Figure3.tiff", plot = Figure_3, scale = 1, dpi = 300, units = "in", width = 8, height = 4.31 )
ggsave("Figure4.tiff", plot = Figure_4, scale = 1, dpi = 300, units = "in", width = 8, height = 4.31 )
ggsave("Figure5.tiff", plot = Figure_5, scale = 1, dpi = 300, units = "in", width = 4, height = 4.31 )


